使用資料連結在這,smo使用機器學習課堂上助教所提供的matlab檔案,因為使用matlab的函數,所以這邊需要安裝matlab,並且安裝matlab engine for python。
使用兩種不同的kernel觀察結果
import numpy as np
from numpy.linalg import inv, eig
import matplotlib.pyplot as plt
import matlab
import matlab.engine
#data preprocessing
data = []
name = {'VERSICOL':0 , 'SETOSA':1, 'VIRGINIC':2}
with open("Iris_data.csv", 'r') as infile:
for line in infile:
line = line.split('\n')[0].split(',')
line[-1] = name[line[-1]]
data += [line]
data = np.asarray(data, dtype = float)
class svm:
def __init__(self, kernel = 0):
self.sv = []
self.w_and_b = -1
self.kernel = kernel
if self.kernel >= 2 or self.kernel < 0:
print 'must be 0 or 1'
raise ValueError
self.eng = matlab.engine.start_matlab()
def phi(self, x):
if self.kernel == 0: #linear
return np.asarray(x).reshape(2,1)
else: #poly homo degree of 2
return np.array([x[0]**2, np.sqrt(2)*x[0]*x[1], x[1]**2]).reshape(3,1)
def compute_kernel(self, data):
n = len(data)
out = np.zeros((n,n))
for i in range(n):
for j in range(n):
out[i][j] = self.phi(data[i]).T.dot(self.phi(data[j]))
return out
def fit_each(self, data, y, c , threshold): #y is target and it must be 1 / -1
n = len(data)
k = self.compute_kernel(data)
## pass value to matlab
k_mat = matlab.double(k.tolist())
y_mat = matlab.double(y.tolist())
y = y[0]
##get coefficient
alpha = np.asarray(self.eng.smo(k_mat ,y_mat ,c ,threshold)[0]).reshape(n)
C = c/n
## compute bias
Nm = 0 #num of certain support vector (0 < value < C)
bias = 0.
for i in range(n):
if alpha[i] == 0. or alpha[i] == C:
continue
Nm += 1
tmp = 0.
for j in range(n): #for each support vector( value > 0)
if alpha[j] == 0.:
continue
tmp += alpha[j] * y[j] * k[i][j]
self.sv += [data[j]]
bias += (y[i] - tmp)
bias /= Nm
## compute w
w = np.zeros((self.phi(data[0]).shape[0],1))
for i in range(n):
w += alpha[i] * y[i] * self.phi(data[i])
return w, bias
def fit(self, data, target, c, threshold):
k = int(np.amax(target)) + 1 #num of classes
self.num_of_class = k
w_and_b = []
for s in range(k):
for t in range(k):
if s < t or s == t:
continue
tmp_data = []
tmp_target = []
for n in range(len(data)):
if target[n] == s:
tmp_data += [data[n]]
tmp_target += [1]
elif target[n] == t:
tmp_data += [data[n]]
tmp_target += [-1]
else:
continue
tmp_target = np.asarray(tmp_target).reshape(1,len(tmp_target))
tmp_data = np.asarray(tmp_data)
w, b = self.fit_each(tmp_data, tmp_target, c, threshold)
w_and_b += [[w,b,s,t]]
self.w_and_b = w_and_b
def predict(self, x):
if self.w_and_b == -1:
raise NameError('must fit dataset first')
vote = [0] * self.num_of_class
for each in range(len(self.w_and_b)):
p = self.w_and_b[each][0].T.dot(self.phi(x)) + self.w_and_b[each][1]
if p >= 0:
vote[self.w_and_b[each][2]] += 1
else:
vote[self.w_and_b[each][3]] += 1
return vote.index(max(vote))
def batch_predict(self, xx, yy):
if self.w_and_b == -1:
raise NameError('must fit dataset first')
out = np.zeros(xx.shape, dtype = int)
for r in range(xx.shape[0]):
for c in range(xx.shape[1]):
out[r][c] = self.predict([ xx[r][c], yy[r][c] ])
return out
def plot(self, train, target, c, threshold, name):
self.fit(train, target, c, threshold)
step = 0.01
x_min, x_max = train[:, 0].min() - 1, train[:, 0].max() + 1
y_min, y_max = train[:, 1].min() - 1, train[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step))
dist = self.batch_predict(xx,yy)
color = ['red', 'blue' ,'limegreen']
sv = (np.asarray(self.sv))
plt.figure()
plt.contourf(xx, yy, dist, alpha=0.3, levels=np.arange(dist.max()+2)-0.5, antialiased=True, colors = color)
plt.scatter(sv[:,0], sv[:,1], color = 'black', marker = 'o')
ax = []
bx = []
cx = []
v = [ax, bx, cx]
for i in range(len(train)):
p = self.predict(train[i])
v[p] += [train[i]]
ax = np.asarray(ax)
bx = np.asarray(bx)
cx = np.asarray(cx)
plt.scatter(ax[:,0], ax[:,1], color = 'red', marker = '+')
plt.scatter(bx[:,0], bx[:,1], color = 'blue', marker = 'x')
plt.scatter(cx[:,0], cx[:,1], color = 'limegreen', marker = '*')
plt.savefig('%s_%d.png'%(name,self.kernel))
target = data[:,-1]
train = data[:,:2]
for i in range(2):
t = svm(i)
t.plot(train ,target, 5., 0., "result")
error = 0
for n in range(len(train)):
if t.predict(train[n]) != target[n]:
error += 1
print 'error:', error
會得到下圖(較黑的點為support vector),以及錯誤數量
liner kernel 為 32 個分類錯誤
polynomina kernel 為 27 個分類錯誤
而若我們把資料四個維度一起考慮,利用LDA方式降低至兩個維度,並且再做一次SVM可以得到下面的結果
liner kernel 為 27 個分類錯誤
polynomina kernel 為 6 個分類錯誤
作圖會是